{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 秩和比综合评价法(Rank Sum Ratio, RSR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 编秩函数\n",
    "def get_rank(data, columns, ascending=True, R=np.zeros(1)):\n",
    "    if data.ndim == 1:\n",
    "        tempdata = np.array(data)[:,None]\n",
    "    if not R.any():\n",
    "        R = np.zeros(tempdata.shape)\n",
    "    for i in columns:\n",
    "        arg = tempdata[:,i].argsort(axis=0)\n",
    "        if not ascending:\n",
    "            arg = arg[::-1]\n",
    "        begin, end = 0, 0\n",
    "        # 找从begin开始相同项, 用end标记最后一个相同项的下一个\n",
    "        while begin < len(arg):\n",
    "            while end < len(arg) and tempdata[arg[end]][i] == tempdata[arg[begin]][i]:\n",
    "                end += 1\n",
    "            for j in range(begin, end):\n",
    "                R[arg[j]][i] = (begin + end + 1) / 2\n",
    "            begin = end\n",
    "    return R.reshape(data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        x1    x2    x3    x4    x5    x6     WRSR\n",
      "1984   8.0   2.0   4.5   6.0   5.0   2.0  0.35820\n",
      "1985  10.0   3.5   1.0   1.0   1.0   5.5  0.35975\n",
      "1983   6.0   1.0   8.0   7.5   9.0   5.5  0.45385\n",
      "1986   9.0   3.5   3.0   7.5   8.0   3.0  0.47070\n",
      "1988   4.0   5.0   4.5   2.0   2.0  10.0  0.50420\n",
      "1992   1.5  10.0   2.0   4.0   4.0   1.0  0.55345\n",
      "1989   5.0   7.0   7.0   3.0   3.0   9.0  0.63400\n",
      "1987   7.0   6.0   9.0   5.0   7.0   8.0  0.68050\n",
      "1991   3.0   8.0   6.0   9.0  10.0   5.5  0.71695\n",
      "1990   1.5   9.0  10.0  10.0   6.0   5.5  0.76840 \n",
      "\n",
      "        f    cf      p    Probit   WRSRfit  排序\n",
      "1984  1.0   1.0  0.100  3.718448  0.337123  10\n",
      "1985  1.0   2.0  0.200  4.158379  0.400506   9\n",
      "1983  1.0   3.0  0.300  4.475599  0.446209   8\n",
      "1986  1.0   4.0  0.400  4.746653  0.485261   7\n",
      "1988  1.0   5.0  0.500  5.000000  0.521762   6\n",
      "1992  1.0   6.0  0.600  5.253347  0.558263   5\n",
      "1989  1.0   7.0  0.700  5.524401  0.597315   4\n",
      "1987  1.0   8.0  0.800  5.841621  0.643018   3\n",
      "1991  1.0   9.0  0.900  6.281552  0.706401   2\n",
      "1990  1.0  10.0  0.975  6.959964  0.804143   1\n"
     ]
    }
   ],
   "source": [
    "# 例14.7\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy.stats import norm\n",
    "from sklearn import linear_model\n",
    "\n",
    "data = np.loadtxt('14.D 医院工作质量统计指标.txt')\n",
    "w = data[-1]\n",
    "data = data[:-1]\n",
    "\n",
    "# 编秩\n",
    "R = np.zeros(data.shape)\n",
    "# 效益型指标编秩\n",
    "R = get_rank(data, columns=[1,5], ascending=False, R=R)\n",
    "# 效益型指标编秩\n",
    "R = get_rank(data, columns=[0,2,3,4], ascending=True, R=R)\n",
    "\n",
    "# 求加权秩和比\n",
    "WRSR = np.matmul(R, w)/R.shape[0]\n",
    "\n",
    "# R_WRSR列名\n",
    "R_WRSR_columns = ['x' + str(i+1) for i in range(R.shape[1])] + ['WRSR']\n",
    "# R_WRSR行名\n",
    "R_WRSR_index = [str(i) for i in range(1983, 1993)]\n",
    "# 构建一个DataFrame. 课本表14.19\n",
    "R_WRSR = pd.DataFrame(np.c_[R, WRSR], columns=R_WRSR_columns, index=R_WRSR_index)\n",
    "R_WRSR_sorted = R_WRSR.sort_values(by='WRSR', ascending=True)\n",
    "print(R_WRSR_sorted, '\\n')\n",
    "\n",
    "# f列名\n",
    "f_columns = ['f', 'cf', 'p', 'Probit', 'WRSRfit', '排序']\n",
    "f = pd.DataFrame(np.c_[np.ones((R_WRSR_sorted.shape[0], 1)), \n",
    "                       [i for i in range(1, 11)],\n",
    "                       np.zeros((R_WRSR_sorted.shape[0], len(f_columns)-2))],\n",
    "                 index=R_WRSR_sorted.index, columns=f_columns)\n",
    "# 计算累积频率\n",
    "f['p'] = f['cf'] / R.shape[0]\n",
    "# 对最后一个累积频率修正\n",
    "f['p'].iloc[-1] = 1 - 1/(4*R.shape[0])\n",
    "# 计算概率单位, 按正态分布\n",
    "f['Probit'] = norm.isf(1-f['p'], 0, 1)+5\n",
    "# 以Probit为自变量, RSR/WRSR为因变量, 计算回归方程\n",
    "reg = linear_model.LinearRegression()\n",
    "reg.fit(np.array(f['Probit']).reshape((10, 1)), R_WRSR_sorted['WRSR'])\n",
    "# 得到reg.coef_是系数, reg.intercept_是常数项. 以此计算f['WRSRfit']\n",
    "f['WRSRfit'] = f['Probit'] * reg.coef_[0] + reg.intercept_\n",
    "f['排序'] = np.array(f['WRSRfit'].argsort())[::-1] + 1\n",
    "print(f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
